--- title: DataBunch for Super Resolution keywords: fastai sidebar: home_sidebar ---
{% raw %}
{% endraw %} {% raw %}
%reload_ext autoreload
%autoreload 2
%matplotlib inline
{% endraw %} {% raw %}
import os
import cv2
import numpy as np
import re
import random
from tqdm import tqdm
from matplotlib import pyplot as plt
{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
import sys
sys.path.append('..')
from superres.datasets import *
from superres.srcnn import *
#from psnr.psnr import *
{% endraw %} {% raw %}
seed = 8610
random.seed(seed)
np.random.seed(seed)
{% endraw %}

after_open function of ImageImageList

{% raw %}
{% endraw %} {% raw %}

resize_image[source]

resize_image(img:PIL.Image, size:int)

resize PIL-image

{% endraw %} {% raw %}
il = ImageImageList.from_folder(set14_hr, after_open=partial(resize_image, size=64))
print(il)
il[0].show()
ImageImageList (14 items)
Image (3, 64, 64),Image (3, 64, 64),Image (3, 64, 64),Image (3, 64, 64),Image (3, 64, 64)
Path: /home/jovyan/notebook/datasets/Set14/HR
{% endraw %} {% raw %}
{% endraw %} {% raw %}

lr_image[source]

lr_image(img:PIL.Image, scale:int, sizeup:bool=False, size:int=None)

create low resolution image. sizeup=False : size=original_size // scale sizeup=True and size=size: upsize to size. sizeup=True and size=None: upsize to original size.

{% endraw %} {% raw %}
# scale=4
il = ImageImageList.from_folder(set14_hr, after_open=partial(lr_image, scale=4))
print(il)
il[0].show()
ImageImageList (14 items)
Image (3, 72, 88),Image (3, 69, 69),Image (3, 97, 146),Image (3, 128, 128),Image (3, 128, 128)
Path: /home/jovyan/notebook/datasets/Set14/HR
{% endraw %} {% raw %}
# scale=4, resize=True
il = ImageImageList.from_folder(set14_hr, after_open=partial(lr_image, scale=4, sizeup=True))
print(il)
il[0].show()
ImageImageList (14 items)
Image (3, 288, 352),Image (3, 276, 276),Image (3, 391, 586),Image (3, 512, 512),Image (3, 512, 512)
Path: /home/jovyan/notebook/datasets/Set14/HR
{% endraw %} {% raw %}
# scale=4, resize=True, size=256
il = ImageImageList.from_folder(set14_hr, after_open=partial(lr_image, scale=4, sizeup=True, size=256))
print(il)
il[0].show()
ImageImageList (14 items)
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
Path: /home/jovyan/notebook/datasets/Set14/HR
{% endraw %} {% raw %}
{% endraw %} {% raw %}

crop_center_image[source]

crop_center_image(img:PIL.Image, size:int)

crop center PIL-image

{% endraw %} {% raw %}
il = ImageImageList.from_folder(set14_hr, after_open=partial(crop_center_image, size=256))
print(il)
il[0].show()
ImageImageList (14 items)
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
Path: /home/jovyan/notebook/datasets/Set14/HR
{% endraw %} {% raw %}
{% endraw %} {% raw %}

split_luminance[source]

split_luminance(img:PIL.Image)

Y channel of YCbCr-Image

{% endraw %} {% raw %}
il = ImageImageList.from_folder(set14_hr, convert_mode='YCbCr', after_open=split_luminance)
print(il)
il[0].show(cmap='gray')
ImageImageList (14 items)
Image (1, 288, 352),Image (1, 276, 276),Image (1, 391, 586),Image (1, 512, 512),Image (1, 512, 512)
Path: /home/jovyan/notebook/datasets/Set14/HR
{% endraw %} {% raw %}
{% endraw %} {% raw %}

after_open_image[source]

after_open_image(img:PIL.Image, size, scale:int=1, sizeup:bool=False, crop:bool=True, luminance:bool=False)

after_open function of ImageImageList

{% endraw %} {% raw %}
# Original
il = ImageImageList.from_folder(set14_hr)
print(il)
il[0].show()
ImageImageList (14 items)
Image (3, 288, 352),Image (3, 276, 276),Image (3, 391, 586),Image (3, 512, 512),Image (3, 512, 512)
Path: /home/jovyan/notebook/datasets/Set14/HR
{% endraw %} {% raw %}
# y when testing:
il = ImageImageList.from_folder(set14_hr, after_open=partial(after_open_image, size=256))
print(il)
il[0].show()
ImageImageList (14 items)
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
Path: /home/jovyan/notebook/datasets/Set14/HR
{% endraw %} {% raw %}
# x when training: if x.shape == y.shape
il = ImageImageList.from_folder(set14_hr, after_open=partial(after_open_image, size=256, scale=4, sizeup=True))
print(il)
il[0].show()
ImageImageList (14 items)
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
Path: /home/jovyan/notebook/datasets/Set14/HR
{% endraw %} {% raw %}
# x when training: if x.shape == y.shape // scale
il = ImageImageList.from_folder(set14_hr, after_open=partial(after_open_image, size=64, scale=4))
print(il)
il[0].show()
ImageImageList (14 items)
Image (3, 64, 64),Image (3, 64, 64),Image (3, 64, 64),Image (3, 64, 64),Image (3, 64, 64)
Path: /home/jovyan/notebook/datasets/Set14/HR
{% endraw %} {% raw %}
# x when training: if x.shape == y.shape and channel is luminance only
il = ImageImageList.from_folder(set14_hr, after_open=partial(after_open_image, size=256, scale=4, sizeup=True, luminance=True))
print(il)
il[0].show(cmap='gray')
ImageImageList (14 items)
Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256)
Path: /home/jovyan/notebook/datasets/Set14/HR
{% endraw %}

Transform

{% raw %}
doc(get_transforms)

get_transforms[source][test]

get_transforms(do_flip:bool=True, flip_vert:bool=False, max_rotate:float=10.0, max_zoom:float=1.1, max_lighting:float=0.2, max_warp:float=0.2, p_affine:float=0.75, p_lighting:float=0.75, xtra_tfms:Optional[Collection[Transform]]=None) → Collection[Transform]

Tests found for get_transforms:

  • pytest -sv tests/test_vision_data.py::test_image_to_image_different_tfms [source]
  • pytest -sv tests/test_vision_data.py::test_image_to_image_different_y_size [source]

To run tests please refer to this guide.

Utility func to easily create a list of flip, rotate, zoom, warp, lighting transforms.

Show in docs

{% endraw %} {% raw %}
{% endraw %} {% raw %}

get_sr_transforms[source]

get_sr_transforms(size, max_lighting:float=0.2, p_lighting:float=0.75, xtra_tfms:Optional[Collection[Transform]]=None)

trainsorms for super-resolution

{% endraw %} {% raw %}
get_sr_transforms(size=256)
([RandTransform(tfm=TfmPixel (crop), kwargs={'size': 256}, p=1.0, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmLighting (brightness), kwargs={'change': (0.4, 0.6)}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmLighting (contrast), kwargs={'scale': (0.8, 1.25)}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True)],
 [RandTransform(tfm=TfmPixel (crop), kwargs={'size': 256}, p=1.0, resolved={}, do_run=True, is_random=True, use_on_y=True)])
{% endraw %} {% raw %}
tfms = get_sr_transforms(256)
il = ImageImageList.from_folder(set14_hr)
print(il)
img = il[0]
img.show()
img.apply_tfms(tfms[0]).show()
ImageImageList (14 items)
Image (3, 288, 352),Image (3, 276, 276),Image (3, 391, 586),Image (3, 512, 512),Image (3, 512, 512)
Path: /home/jovyan/notebook/datasets/Set14/HR
{% endraw %}

DataBunch

{% raw %}
{% endraw %} {% raw %}

create_sr_databunch[source]

create_sr_databunch(data_path:PosixPath, in_size:int, out_size:int, scale:int, bs:int, convert_mode:str='RGB', seed:int=1234)

create databunch for super-resolution

{% endraw %} {% raw %}
# in_size=64, out_size=256
data = create_sr_databunch(div2k_train_hr_crop_256, in_size=64, out_size=256, scale=4, bs=10, seed=8610)
print(data)
data.show_batch()
ImageDataBunch;

Train: LabelList (25245 items)
x: ImageImageList
Image (3, 64, 64),Image (3, 64, 64),Image (3, 64, 64),Image (3, 64, 64),Image (3, 64, 64)
y: ImageImageList
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
Path: /home/jovyan/notebook/datasets/DIV2K/DIV2K_train_HR_crop/256;

Valid: LabelList (6311 items)
x: ImageImageList
Image (3, 64, 64),Image (3, 64, 64),Image (3, 64, 64),Image (3, 64, 64),Image (3, 64, 64)
y: ImageImageList
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
Path: /home/jovyan/notebook/datasets/DIV2K/DIV2K_train_HR_crop/256;

Test: None
{% endraw %} {% raw %}
# in_size=256, out_size=256
data = create_sr_databunch(div2k_train_hr_crop_256, in_size=256, out_size=256, scale=4, bs=10, seed=8610)
print(data)
data.show_batch()
ImageDataBunch;

Train: LabelList (25245 items)
x: ImageImageList
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
y: ImageImageList
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
Path: /home/jovyan/notebook/datasets/DIV2K/DIV2K_train_HR_crop/256;

Valid: LabelList (6311 items)
x: ImageImageList
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
y: ImageImageList
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
Path: /home/jovyan/notebook/datasets/DIV2K/DIV2K_train_HR_crop/256;

Test: None
{% endraw %} {% raw %}
# in_size=256, out_size=256, luminance only
data = create_sr_databunch(div2k_train_hr_crop_256, in_size=256, out_size=256, scale=4, bs=10, convert_mode='YCbCr', seed=8610)
print(data)
data.show_batch(cmap='gray')
ImageDataBunch;

Train: LabelList (25245 items)
x: ImageImageList
Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256)
y: ImageImageList
Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256)
Path: /home/jovyan/notebook/datasets/DIV2K/DIV2K_train_HR_crop/256;

Valid: LabelList (6311 items)
x: ImageImageList
Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256)
y: ImageImageList
Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256)
Path: /home/jovyan/notebook/datasets/DIV2K/DIV2K_train_HR_crop/256;

Test: None
{% endraw %}

Why I don't use imagenet_stats when luminance-only.

{% raw %}
def create_imagenet_databunch(data_path:PosixPath, in_size:int, out_size:int, scale:int, bs:int, convert_mode:str='RGB', seed:int=1234)->'DataBunch':
    """ create databunch for super-resolution """
    luminance = convert_mode == 'YCbCr'
    sizeup = in_size == out_size
    
    src = (ImageImageList.from_folder(data_path, convert_mode=convert_mode,
                                      after_open=partial(after_open_image, size=in_size, scale=scale, sizeup=sizeup, luminance=luminance))
        .split_by_rand_pct(seed=seed)
        .label_from_func((lambda x: x), label_cls=ImageImageList, convert_mode=convert_mode,
                         after_open=partial(after_open_image, size=out_size, luminance=luminance)))
    
    data = (src.transform(get_sr_transforms(size=in_size), tfm_y=True)
        .transform_y(get_sr_transforms(size=out_size, max_lighting=0))
        .databunch(path=Path('.'), bs=bs))
    # set imagenet_status
    data.normalize(imagenet_stats, do_y=True)
    return data
{% endraw %} {% raw %}
data = create_imagenet_databunch(div2k_train_hr_crop_256, in_size=256, out_size=256, scale=4, bs=10, convert_mode='YCbCr', seed=8610)
{% endraw %} {% raw %}
# get one_batch
x, y = data.one_batch()
{% endraw %} {% raw %}
# the channel is 3 instead of 1.
x.shape
torch.Size([10, 3, 256, 256])
{% endraw %} {% raw %}
# the channel is 3 instead of 1.
x, y = next(iter(data.train_dl))
x.shape
torch.Size([10, 3, 256, 256])
{% endraw %} {% raw %}
# the shape of data.x is correct.
print(data.x)
data.x[0].shape
ImageImageList (25245 items)
Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256),Image (1, 256, 256)
Path: /home/jovyan/notebook/datasets/DIV2K/DIV2K_train_HR_crop/256
torch.Size([1, 256, 256])
{% endraw %} {% raw %}
# the shape of data.x is correct.
print(data.train_ds[0])
x, y = data.train_ds[0]
x.shape
(Image (1, 256, 256), Image (1, 256, 256))
torch.Size([1, 256, 256])
{% endraw %} {% raw %}
# train_dl class is ...
type(data.train_dl)
fastai.basic_data.DeviceDataLoader
{% endraw %}
class DeviceDataLoader():
    def __iter__(self):
        "Process and returns items from `DataLoader`."
        for b in self.dl: yield self.proc_batch(b)
{% raw %}
# the shape of train_dl.dl(Pytorch class) is correct.
x, y = next(iter(data.train_dl.dl))
x.shape
torch.Size([10, 1, 256, 256])
{% endraw %} {% raw %}
# The shape changes when you call DeviceDataLoader.proc_batch()
b = next(iter(data.train_dl.dl))
x, y = data.train_dl.proc_batch(b)
x.shape
torch.Size([10, 3, 256, 256])
{% endraw %}
class DeviceDataLoader():
    def proc_batch(self,b:Tensor)->Tensor:
        "Process batch `b` of `TensorImage`."
        b = to_device(b, self.device)
        for f in listify(self.tfms): b = f(b)
        return b
{% raw %}
# The shape is changed in _normalize_batch
data.train_dl.tfms
[functools.partial(<function _normalize_batch at 0x7fd9c69687b8>, mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250]), do_x=True, do_y=True)]
{% endraw %}

fastai.vidion.data.py

def normalize(x:TensorImage, mean:FloatTensor,std:FloatTensor)->TensorImage:
    "Normalize `x` with `mean` and `std`."
    return (x-mean[...,None,None]) / std[...,None,None]

def _normalize_batch(b:Tuple[Tensor,Tensor], mean:FloatTensor, std:FloatTensor, do_x:bool=True, do_y:bool=False)->Tuple[Tensor,Tensor]:
    "`b` = `x`,`y` - normalize `x` array of imgs and `do_y` optionally `y`."
    x,y = b
    mean,std = mean.to(x.device),std.to(x.device)
    if do_x: x = normalize(x,mean,std)
    if do_y and len(y.shape) == 4: y = normalize(y,mean,std)
    return x,y

The shape changed because .normalize(imagenet_stats) was specified when creating a DataBunch. So I don't specify imagenet_stats for 1 channel-image.

Metrix

{% raw %}
{% endraw %} {% raw %}

extract_y[source]

extract_y(img:Tensor)

extract luminance. img is RGB, input range is [0...1], output range is [0..255]

{% endraw %} {% raw %}
{% endraw %} {% raw %}

denorm_img[source]

denorm_img(x:Tensor)

de normalize image

{% endraw %} {% raw %}
{% endraw %} {% raw %}

m_psnr[source]

m_psnr(img1:Tensor, img2:Tensor)

metrics: peak_signal_noise_ratio

{% endraw %} {% raw %}
{% endraw %} {% raw %}

m_ssim[source]

m_ssim(img1, img2)

metrics: structural_similarity

{% endraw %}

Training

{% raw %}
train_hr = div2k_train_hr_crop_256
{% endraw %} {% raw %}
in_size = 256
out_size = 256
scale = 4
bs = 10
{% endraw %} {% raw %}
data = create_sr_databunch(train_hr, in_size=in_size, out_size=out_size, scale=scale, bs=bs, seed=seed)
print(data)
data.show_batch()
ImageDataBunch;

Train: LabelList (25245 items)
x: ImageImageList
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
y: ImageImageList
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
Path: /home/jovyan/notebook/datasets/DIV2K/DIV2K_train_HR_crop/256;

Valid: LabelList (6311 items)
x: ImageImageList
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
y: ImageImageList
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
Path: /home/jovyan/notebook/datasets/DIV2K/DIV2K_train_HR_crop/256;

Test: None
{% endraw %}

Image super-resolution using deep convolutional networks

https://arxiv.org/abs/1501.00092

31 Dec 2014

{% raw %}
model = SRCNN()
loss_func = MSELossFlat()
metrics = [m_psnr]
learn = Learner(data, model, loss_func=loss_func, metrics=metrics)
learn.path = Path('.')
model_name = model.__class__.__name__
{% endraw %} {% raw %}
lr_find(learn)
learn.recorder.plot(suggestion=True)
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
Min numerical gradient: 8.32E-04
Min loss divided by 10: 2.29E-03
{% endraw %} {% raw %}
lr = 1e-3
lrs = slice(lr)
epoch = 3
pct_start = 0.3
wd = 1e-3
save_fname = model_name
{% endraw %} {% raw %}
callbacks = [ShowGraph(learn), SaveModelCallback(learn, name=save_fname)]
{% endraw %} {% raw %}
learn.fit_one_cycle(epoch, lrs, pct_start=pct_start, wd=wd, callbacks=callbacks)
epoch train_loss valid_loss m_psnr time
0 0.111328 0.073401 28.265835 01:34
1 0.096555 0.063644 30.407476 01:34
2 0.094229 0.059933 31.025671 01:28
Better model found at epoch 0 with valid_loss value: 0.07340064644813538.
Better model found at epoch 1 with valid_loss value: 0.06364426016807556.
Better model found at epoch 2 with valid_loss value: 0.059933461248874664.
{% endraw %} {% raw %}
learn.show_results()
{% endraw %}

Test

{% raw %}
test_hr = set14_hr
{% endraw %} {% raw %}
il_test_x = ImageImageList.from_folder(test_hr, after_open=partial(after_open_image, scale=4, sizeup=True, size=out_size))
il_test_y = ImageImageList.from_folder(test_hr, after_open=partial(after_open_image, size=out_size))
{% endraw %} {% raw %}
{% endraw %} {% raw %}

reconstruct_image[source]

reconstruct_image(t:Tensor)

create Image from Tensor

{% endraw %} {% raw %}
{% endraw %} {% raw %}

sr_predict[source]

sr_predict(learn:Learner, item:ItemBase)

predict for super-resolution

{% endraw %} {% raw %}
{% endraw %} {% raw %}

get_metrics[source]

get_metrics(img1:Tensor, img2:Tensor)

psnr and ssim

{% endraw %} {% raw %}
{% endraw %} {% raw %}

fmt_metrics[source]

fmt_metrics(metrics)

format of metrics

{% endraw %} {% raw %}
{% endraw %} {% raw %}

mean_metrics[source]

mean_metrics(metrics)

mean of metrics

{% endraw %} {% raw %}
{% endraw %} {% raw %}

sr_test[source]

sr_test(learn:Learner, il_test_x:ImageImageList, il_test_y:ImageImageList, model_name:str, cmap:str=None)

test for super-resolution

{% endraw %} {% raw %}
_ = learn.load(save_fname)
{% endraw %} {% raw %}
sr_test(learn, il_test_x, il_test_y, model_name)
# Official: bicubic: PSNR:25.99, SSIM:0.7486
# Official: SRCNN:   PSNR:27.50, SSIM:0.7513
bicubic: PSNR:24.11,SSIM:0.7822
SRCNN:	 PSNR:24.68,SSIM:0.8057
{% endraw %} {% raw %}
{% endraw %} {% raw %}

sr_test_upscale[source]

sr_test_upscale(learn:Learner, il_x:ImageImageList, il_y:ImageImageList, il_x_up:ImageImageList, model_name:str, cmap:str=None)

model で upscale する SR のテストを実施する

{% endraw %}